data <- read.csv("gender_inequality_and_economic_indicators_2018.csv")ENTREPRENEURSHIP
data <- data[c(
"Region",
"Economy",
"ENTREPRENEURSHIP",
"Does.the.law.prohibit.discrimination.in.access.to.credit.based.on.gender.",
"Can.a.woman.sign.a.contract.in.the.same.way.as.a.man.",
"Can.a.woman.register.a.business.in.the.same.way.as.a.man.",
"Can.a.woman.open.a.bank.account.in.the.same.way.as.a.man."
)]
# Display the new data frame
head(data) Region Economy ENTREPRENEURSHIP
1 South Asia afghanistan 75
2 Europe & Central Asia albania 100
3 Middle East & North Africa algeria 75
4 Sub-Saharan Africa angola 100
5 Latin America & Caribbean antigua and barbuda 75
6 Latin America & Caribbean argentina 75
Does.the.law.prohibit.discrimination.in.access.to.credit.based.on.gender.
1 No
2 Yes
3 No
4 Yes
5 No
6 No
Can.a.woman.sign.a.contract.in.the.same.way.as.a.man.
1 Yes
2 Yes
3 Yes
4 Yes
5 Yes
6 Yes
Can.a.woman.register.a.business.in.the.same.way.as.a.man.
1 Yes
2 Yes
3 Yes
4 Yes
5 Yes
6 Yes
Can.a.woman.open.a.bank.account.in.the.same.way.as.a.man.
1 Yes
2 Yes
3 Yes
4 Yes
5 Yes
6 Yes
EDA
library(ggplot2)
library(plotly)
# Count frequencies
region_counts <- table(data$Region)
ggplot(data, aes(x = Region)) +
geom_bar(fill = "steelblue") +
coord_flip() +
labs(title = "Distribution of Regions", x = "Region", y = "Count") +
theme_minimal()library(ggplot2)
library(plotly)
stacked_plt <- ggplot(data, aes(x = Region, fill = Does.the.law.prohibit.discrimination.in.access.to.credit.based.on.gender.)) +
geom_bar(position = "stack") +
labs(
title = "Does the Law Prohibit Discrimination in Access to Credit Based on Gender by Region",
x = "Region",
y = "Count",
fill = "Response"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(stacked_plt)contract_plt <- ggplot(data, aes(x = Region, fill = Can.a.woman.sign.a.contract.in.the.same.way.as.a.man.)) +
geom_bar(position = "stack") +
labs(
title = "Can a Woman Sign a Contract in the Same Way as a Man by Region",
x = "Region",
y = "Count",
fill = "Response"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(contract_plt)business_plt <- ggplot(data, aes(x = Region, fill = Can.a.woman.register.a.business.in.the.same.way.as.a.man.)) +
geom_bar(position = "stack") +
labs(
title = "Can a Woman Register a Business in the Same Way as a Man by Region",
x = "Region",
y = "Count",
fill = "Response"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(business_plt)bank_account_plt <- ggplot(data, aes(x = Region, fill = Can.a.woman.open.a.bank.account.in.the.same.way.as.a.man.)) +
geom_bar(position = "stack") +
labs(
title = "Can a Woman Open a Bank Account in the Same Way as a Man by Region",
x = "Region",
y = "Count",
fill = "Response"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(bank_account_plt)library(dplyr)
contingency_table <- table(data$Region, data$Does.the.law.prohibit.discrimination.in.access.to.credit.based.on.gender.)
print(contingency_table)
No Yes
East Asia & Pacific 18 7
Europe & Central Asia 8 15
High income: OECD 5 29
Latin America & Caribbean 21 11
Middle East & North Africa 17 3
South Asia 7 1
Sub-Saharan Africa 39 9
chi_test <- chisq.test(contingency_table)
cat("P-value:", chi_test$p.value, "\n")P-value: 7.168982e-10
print(chi_test)
Pearson's Chi-squared test
data: contingency_table
X-squared = 54.062, df = 6, p-value = 7.169e-10
res1 <- t.test(ENTREPRENEURSHIP ~ Does.the.law.prohibit.discrimination.in.access.to.credit.based.on.gender., data = data)
print(res1)
Welch Two Sample t-test
data: ENTREPRENEURSHIP by Does.the.law.prohibit.discrimination.in.access.to.credit.based.on.gender.
t = -23.72, df = 133.49, p-value < 2.2e-16
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-30.02074 -25.39955
sample estimates:
mean in group No mean in group Yes
71.95652 99.66667
res2 <- t.test(ENTREPRENEURSHIP ~ Can.a.woman.sign.a.contract.in.the.same.way.as.a.man., data = data)
print(res2)
Welch Two Sample t-test
data: ENTREPRENEURSHIP by Can.a.woman.sign.a.contract.in.the.same.way.as.a.man.
t = -80.561, df = 187, p-value < 2.2e-16
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-85.82807 -81.72512
sample estimates:
mean in group No mean in group Yes
0.0000 83.7766
business_table <- table(data$Region, data$Can.a.woman.register.a.business.in.the.same.way.as.a.man.)
business_chi_test <- chisq.test(business_table)
print(business_chi_test)
Pearson's Chi-squared test
data: business_table
X-squared = 9.8134, df = 6, p-value = 0.1327